import torch

from attacks import Attack
import torch.nn.functional as F

from constants import DEVICE
from utils import cross_entropy_loss, de_normalization, normalization


class VMIFGSM(Attack):
    """ VMI-FGSM """

    def __init__(self, model, eps=16 / 255, steps=10, decay=1.0, N=20, beta=1.5):
        """

        :param model: DNN model
        :param eps: the maximum perturbation
        :param steps: the number of iterations
        :param decay: the decay factor
        :param N: the number of sampled examples
        :param beta:
        """
        super().__init__("VMIFGSM", model)
        self.eps = eps
        self.steps = steps
        self.alpha = self.eps / self.steps
        self.decay = decay
        self.N = N
        self.beta = beta

    def uniform_distribution(self, size):
        return torch.rand(size, device=DEVICE) * 2 * self.beta * self.eps - self.beta * self.eps

    def forward(self, images, labels):
        targets = F.one_hot(labels.type(torch.int64), 1000).float().to(DEVICE)
        images_de_normalized = de_normalization(images)
        images_min = torch.clamp(images_de_normalized - self.eps, min=0.0, max=1.0)
        images_max = torch.clamp(images_de_normalized + self.eps, min=0.0, max=1.0)

        g = torch.zeros_like(images)
        v = torch.zeros_like(images)
        adv = images.clone()
        for _ in range(self.steps):
            y_predicts = self.model(adv)
            loss = cross_entropy_loss(y_predicts, targets)
            grad = torch.autograd.grad(loss, adv)[0]
            # V
            new_grad = grad + v
            V = torch.zeros_like(images)
            for _ in range(self.N):
                x_de_normalized = de_normalization(adv)
                x_de_normalized = x_de_normalized + self.uniform_distribution(adv.shape)
                x = normalization(x_de_normalized)

                y_predicts = self.model(x)
                loss = cross_entropy_loss(y_predicts, targets)
                V += torch.autograd.grad(loss, x)[0]
            V /= self.N
            v = V - grad
            # end V
            g = self.decay * g + new_grad / torch.mean(torch.abs(new_grad), dim=(1, 2, 3), keepdim=True)

            adv_de_normalized = de_normalization(adv)
            adv_de_normalized = torch.clamp(adv_de_normalized + self.alpha * torch.sign(g), min=images_min,
                                            max=images_max)
            adv = normalization(adv_de_normalized)

        return adv
